## Replicate code 3_create_vars:
rm(list = ls(all.names = TRUE)) 
gc() 

#data <- as.data.table(read.csv("codes/final_sample.csv"))
data <- readRDS("intermediary_outputs/robustness/data_step_descriptive.Rds")  
data <- as.data.table(data)

data$interview_dummy <- as.numeric(!is.na(data$id_interview))

## Skin groups
data$skin_group <- NA
data$skin_group[which(data$skin_color <=6)] <- 0
data$skin_group[which(data$skin_color >6 & data$skin_color <= 9)] <- 1
data$skin_group[which(data$skin_color > 9)] <- 2

## Dark against light skin color:
data$skin_group_2 <- data$skin_group
data$skin_group_2[which(data$skin_group == 2)] <- 1
table(data$skin_color, data$skin_group_2)

### Correct occupations --------
## Aggregating occupations into fewer categories:
data$occ_father_simple <- NA
data$occ_father_simple[which(data$code_egp_father == 1)] <- 1 # Higher Controllers
data$occ_father_simple[which(data$code_egp_father == 2)] <- 1 # Lower controllers
data$occ_father_simple[which(data$code_egp_father == 3)] <- 2 # Routine Nonmanual
data$occ_father_simple[which(data$code_egp_father == 4)] <- 2 # Lower Sales-Servicess
data$occ_father_simple[which(data$code_egp_father == 5)] <- 3 # Selfemployed with employees
data$occ_father_simple[which(data$code_egp_father == 6)] <- 3 # Seldemployed with no employees
data$occ_father_simple[which(data$code_egp_father == 7)] <- 4 # Manual Supervisor
data$occ_father_simple[which(data$code_egp_father == 8)] <- 5 # Skilled Workers
data$occ_father_simple[which(data$code_egp_father == 9)] <- 6 # Unskilled Workers
data$occ_father_simple[which(data$code_egp_father == 10)] <- 7 # Farm Labor
data$occ_father_simple[which(data$code_egp_father == 11)] <- 3 # Selfemployed Farmer

data$occ_father_simple_no_job <- data$occ_father_simple
data$occ_father_simple_no_job[which(data$no_work_father == 1)] <- 8

data$occ_father_simple_no_na <- data$occ_father_simple
data$occ_father_simple_no_na[which(is.na(data$occ_father_simple) == T)] <- 8

data$occ_father_simple_no_job_no_na <- data$occ_father_simple_no_job
data$occ_father_simple_no_job_no_na[which(is.na(data$occ_father_simple_no_job) == T)] <- 9

## Aggregating occupations into fewer categories - Mother: 
data$occ_mother_simple <- NA
data$occ_mother_simple[which(data$code_egp_mother == 1)] <- 1 # Higher Controllers
data$occ_mother_simple[which(data$code_egp_mother == 2)] <- 1 # Lower controllers
data$occ_mother_simple[which(data$code_egp_mother == 3)] <- 2 # Routine Nonmanual
data$occ_mother_simple[which(data$code_egp_mother == 4)] <- 2 # Lower Sales-Servicess
data$occ_mother_simple[which(data$code_egp_mother == 5)] <- 3 # Selfemployed with employees
data$occ_mother_simple[which(data$code_egp_mother == 6)] <- 3 # Seldemployed with no employees
data$occ_mother_simple[which(data$code_egp_mother == 7)] <- 4 # Manual Supervisor
data$occ_mother_simple[which(data$code_egp_mother == 8)] <- 5 # Skilled Workers
data$occ_mother_simple[which(data$code_egp_mother == 9)] <- 6 # Unskilled Workers
data$occ_mother_simple[which(data$code_egp_mother == 10)] <- 7 # Farm Labor
data$occ_mother_simple[which(data$code_egp_mother == 11)] <- 3 # Selfemployed Farmer

data$occ_mother_simple_no_job <- data$occ_mother_simple
data$occ_mother_simple_no_job[which(data$no_work_mother == 1)] <- 8

data$occ_mother_simple_no_na <- data$occ_mother_simple
data$occ_mother_simple_no_na[which(is.na(data$occ_mother_simple) == T)] <- 8

data$occ_mother_simple_no_job_no_na <- data$occ_mother_simple_no_job
data$occ_mother_simple_no_job_no_na[which(is.na(data$occ_mother_simple_no_job) == T)] <- 9

data[, branco := as.numeric(race == 1)]
data[, pardo := as.numeric(race == 2)]
data[, preto := as.numeric(race == 3)]

## Classroom characteristics:
avg_class_races <- data %>% group_by(class_id) %>%
  summarise(n(), "white" = mean(branco, na.rm = T), "brown" = mean(pardo, na.rm = T), 
            "black" = mean(preto, na.rm = T), "skin" = mean(skin_color, na.rm = T),
            "male" = mean(male, na.rm = T), "male_imp" = mean(male_imputed, na.rm = T),
            "missing_interview" = mean(interview_dummy, na.rm = T),
            "mean_grades_score" = NA, 
            "max_grades_score" = NA,
            "min_grades_score" = NA,
            "median_grades_score" = NA,
            "avg_skin_class" = mean(skin_color, na.rm = T),
            "avg_score_racism_class" = mean(score_racism, na.rm = T),
            "avg_score_parents_support_class" = mean(score_parents_support, na.rm = T),
            "avg_score_study_class" = mean(score_study, na.rm = T),
            "avg_score_self_esteem_class" = mean(score_self_esteem, na.rm = T),
            "avg_scores_poverty_class" = mean(scores_poverty, na.rm = T),
            "avg_scores_sdo_1n_class" = mean(scores_sdo_1, na.rm = T),
            "avg_scores_sdo_2_class" = mean(scores_sdo_2, na.rm = T),
            "avg_score_homophobia_class" = mean(score_homophobia, na.rm = T),
            "avg_dominance_score_class" = mean(dominance_score, na.rm = T),
            "avg_anti_equality_score_class" = mean(anti_equality_score, na.rm = T),
            "avg_score_climate_school_class" = mean(score_climate_school, na.rm = T),
            "avg_score_violence_school_class" = mean(score_violence_school, na.rm = T),
            "avg_score_neighborhood_quality_class" = mean(score_neighborhood_quality, na.rm = T),
  )

data$class_size <- NA
data$sh_white_class <- NA
data$sh_brown_class <- NA
data$sh_black_class <- NA
data$sh_nw_class <- NA
data$skin_color_class <- NA
data$sh_males_class <- NA
data$sh_males_imputed_class <- NA
data$sh_missing_interview_class <- NA

data$avg_grades_score_class <- NA
data$max_grades_score_class <- NA
data$min_grades_score_class <- NA
data$median_grades_score_class <- NA
data$avg_skin_class <- NA
data$avg_score_racism_class <- NA
data$avg_score_parents_support_class <- NA
data$avg_score_study_class <- NA
data$avg_score_self_esteem_class <- NA
data$avg_scores_poverty_class <- NA
data$avg_scores_sdo_1_class <- NA
data$avg_scores_sdo_2_class <- NA
data$avg_score_homophobia_class <- NA
data$avg_dominance_score_class <- NA
data$avg_anti_equality_score_class <- NA
data$avg_score_climate_school_class <- NA
data$avg_score_violence_school_class <- NA
data$avg_score_overall_violence_school_class <- NA
data$avg_score_neighborhood_quality_class <- NA

for (i in 1:nrow(avg_class_races)) {
  data$class_size[which(data$class_id == i)] <- as.numeric(avg_class_races[i,2])     
  data$sh_white_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,3])     
  data$sh_brown_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,4]     )
  data$sh_black_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,5]     )
  data$sh_nw_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,4] + avg_class_races[i,5]      )
  data$skin_color_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,6]     )
  data$sh_males_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,7]     )
  data$sh_males_imputed_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,8]     )
  data$sh_missing_interview_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,9]     )
  data$avg_grades_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,10]     )
  data$max_grades_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,11]     )
  data$min_grades_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,12]     )
  data$median_grades_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,13]     )
  data$avg_skin_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,14]     )
  data$avg_score_racism_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,15]     )
  data$avg_score_parents_support_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,16]     )
  data$avg_score_study_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,17]     )
  data$avg_score_self_esteem_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,18]     )
  data$avg_scores_poverty_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,19]     )
  data$avg_scores_sdo_1_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,20]     )
  data$avg_scores_sdo_2_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,21]     )
  data$avg_score_homophobia_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,22]     )
  data$avg_dominance_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,23]     )
  data$avg_anti_equality_score_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,24]     )
  data$avg_score_climate_school_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,25]     )
  data$avg_score_violence_school_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,26]     )
  data$avg_score_neighborhood_quality_class[which(data$class_id == i)] <- as.numeric(avg_class_races[i,27]     )
  
}


## Replicate code 4_compute_index --------------

# Load network data
## Recall that we have to use the original networks, without excluding
## any observation!
list_networks_strong <- readRDS("list_anonymous_networks.Rds")

# Load supporting functions
source("04a_supporting_functions_en.R", encoding = "UTF-8")

# Compute social status across all races
final_scores_2 <- do.call(rbind, lapply(list_networks_strong, popularity_index))

data$ssi_all_race_2 <- NA

for (i in unique(final_scores_2[,1])) {
  data$ssi_all_race_2[which(data$id_student == i)] <- final_scores_2[which(final_scores_2[,1] == i),2]  
}

data$ssi_all_race_2[which(is.na(data$id_interview) == T)] <- NA

# Standardize measure:
selection_criteria <- which(is.na(data$id_interview) == F)
data$ssi_all_race_2_std <- std_vector(data$ssi_all_race_2)

## Computing number of friends 
result_get_friends_2 <- lapply(list_networks_strong, get_friends)
list_friends_2 <- list()

for (i in 1:length(result_get_friends_2)) {
  list_networks_strong[[i]] <- result_get_friends_2[[i]][[1]]
  list_friends_2[[i]] <- result_get_friends_2[[i]][[2]]
  
}

## Inputing values into matrix 
list_friends_2 <- as.data.table(do.call(rbind, list_friends_2))  

colnames(list_friends_2) <- c("id_student", "friends_2", "white_friends_2", 
                              "black_friends_2", "brown_friends_2", "other_friends_2", 
                              "nonwhite_friends_2", "diff_race_friends_2", 
                              "diff_nw_friends_2", "same_race_friends_2", 
                              "same_race_nw_friends_2")

data <- merge.data.table(data, list_friends_2, by = "id_student", all.x = T)

# Compute share of each race friends 
data$sh_wh_fr_2 <- data$white_friends_2/data$friends_2
data$sh_nw_fr_2 <- (data$black_friends_2 + data$brown_friends_2)/data$friends_2
data$sh_br_fr_2 <- data$brown_friends_2/data$friends_2
data$sh_bl_fr_2 <- data$black_friends_2/data$friends_2

## Compute Social Status within race 

# Compute the social status for the white vs. nonwhite (with NA for others):
ssi_same_negro_na_2 <- unlist(sapply(list_networks_strong, ssi_net, "negro_na"))
data$ssi_same_negro_na_2 <- NA


for (i in unique(as.numeric(names(ssi_same_negro_na_2)))) {
  data$ssi_same_negro_na_2[which(data$id_student == i)] <- 
    ssi_same_negro_na_2[which(as.numeric(names(ssi_same_negro_na_2))== i)]
}

# Standardize
data$ssi_same_negro_na_2[which(is.na(data$id_interview) == T)] <- NA
data$ssi_same_negro_na_2_std <- std_vector(data$ssi_same_negro_na_2)

## Compute social status index based on other race (nonwhite vs white only)
# Here I have a problem with monoracial classrooms. I have to add a little
# check ("trial") in the function so it will avoid these classrooms.

ssi_net_other_2_race_mono <- function(g, vattr) {
  
  print(unique(network::get.vertex.attribute(g, "school_id")))
  print(unique(network::get.vertex.attribute(g, "class_id")))
  
  gg <-  network(network::as.matrix.network.adjacency(g) , directed = T)
  gg %v% vattr <- network::get.vertex.attribute(g, vattr)
  degs <- sna::degree(gg, cmode = "outdegree")
  
  a <- network::get.vertex.attribute(gg, vattr)
  ssi_other <- rep(NA, network.size(gg))
  names(ssi_other) <- network.vertex.names(gg)
  
  original_ids <- get.vertex.attribute(gg,vattr)
  
  for (ind in 1:network.size(gg)) {
    # ind = 1
    set.vertex.attribute(gg,vattr, value = original_ids)
    ids <- get.vertex.attribute(gg,vattr)
    my_id <- network.vertex.names(gg)[ind]
    
    if (is.na(ids[ind]) == T) {
      print("Do nothing")
    } else if (ids[ind] == 1) {
      ids[ind] <- 0
    } else if (ids[ind] == 0) {
      ids[ind] <- 1
    }
    
    set.vertex.attribute(gg,vattr, value = ids)
    get.vertex.attribute(gg,vattr)
    size_problem <- length(get.vertex.attribute(gg,vattr))
    diversity <- length(table(get.vertex.attribute(gg,vattr)))
    
    # The "trial":
    trial <- get.vertex.attribute(gg,vattr)
    trial <- trial[which(is.na(trial) == F )]
    
    
    if (diversity == 1) {
      ssi_other[ind] <- 0
    } else if (size_problem == 2) {
      ssi_other[ind] <- 0
    } else if (length(unique(trial)) == 1 | length(trial)  <= 2) {
      ssi_other[ind] <- 0
    } else {
      l <- unlist(lapply(unique(a), 
                         function(val) ssib_net(g = gg, vattr = vattr, b = val)))
      
      l[ order(as.numeric(names(l)))]
      ssi_other[ind] <- l[which(names(l) == my_id)] 
    }
  }
  
  return(ssi_other)
}

ssi_other_race_negro_na_2 <- 
  unlist(sapply(list_networks_strong, ssi_net_other_2_race_mono, "negro_na"))

data$ssi_other_race_negro_na_2 <- NA

for (i in unique(as.numeric(names(ssi_other_race_negro_na_2)))) {
  data$ssi_other_race_negro_na_2[which(data$id_student == i)] <- 
    ssi_other_race_negro_na_2[which(as.numeric(names(ssi_other_race_negro_na_2))== i)]
}

# Standardize measure
data$ssi_other_race_negro_na_2[which(is.na(data$id_interview) == T)] <- NA
data$ssi_other_race_negro_na_2_std <- std_vector(data$ssi_other_race_negro_na_2)

## Replicate code 5_grades_score -----------
## Correct the grades of problematic observations:    
# Some kids in Elementary school have grades for High school only classes. Correct it:
problem_ef <- which(data$high_school == 0 & is.na(data$FIS_grade) == F )

# Input the average of those grades into the science grade:
data$CIEN_grade[problem_ef] <- (data$FIS_grade[problem_ef] + data$QUI_grade[problem_ef] +data$BIO_grade[problem_ef] + 
                                  data$SOC_grade[problem_ef] + data$FIL_grade[problem_ef])/5 

# Then replace their grades with NA
data$FIS_grade[which(data$high_school == 0 & is.na(data$FIS_grade) == F )] <- NA
data$QUI_grade[which(data$high_school == 0 & is.na(data$QUI_grade) == F )] <- NA
data$BIO_grade[which(data$high_school == 0 & is.na(data$BIO_grade) == F )] <- NA
data$FIL_grade[which(data$high_school == 0 & is.na(data$FIL_grade) == F )] <- NA
data$SOC_grade[which(data$high_school == 0 & is.na(data$SOC_grade) == F )] <- NA

# Other kids in High School had grades for science, but not for Physics and so on...
# For these ones, I will replace the grades in the high-school only subjects with the average
# grades in all other subjects:
problem_em <- which(data$high_school == 1 & is.na(data$CIEN_grade) == F)

data$FIS_grade[problem_em] <- (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] + 
                                 data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6
data$BIO_grade[problem_em] <- (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] + 
                                 data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6
data$QUI_grade[problem_em] <- (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] + 
                                 data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6
data$FIL_grade[problem_em] <- (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] + 
                                 data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6
data$SOC_grade[problem_em] <- (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] + 
                                 data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6


## Scores excluding students that do not fit the selection criteria and that were not interviewed: ----
var_list <- c("LP", "MAT", "HIST", "GEO", "EDFIS", 
              "ING", "ART", "FIS", "QUI", "BIO",
              "FIL", "SOC", "CIEN")

## Recall that here there should be no restriction to the individuals for 
## which we compute the index.
for (vars in var_list){
  data[is.na(get(paste0(vars, "_grade"))) == F,
       paste0("grade_", vars, "_std_class") := 
         as.vector(scale(get(paste0(vars, "_grade")))), 
       by = .(class_id)]
}

### Gen dataset for EFA analysis, to create the score for grades: ------
data_grades_EM <- data %>% 
  subset(high_school == 1 & is.na(id_student) == F,
         select = c(id_student, grade_LP_std_class,  grade_MAT_std_class, grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_FIS_std_class, grade_QUI_std_class,
                    grade_BIO_std_class, grade_FIL_std_class, grade_SOC_std_class))  

data_grades_EF <- data %>% 
  subset(high_school == 0 & is.na(id_student) == F,
         select = c(id_student,grade_LP_std_class,  grade_MAT_std_class, grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_CIEN_std_class))  

data_grades_EM_with_edfis <- data %>% 
  subset(high_school == 1 & is.na(id_student) == F,
         select = c(grade_LP_std_class,  grade_MAT_std_class, grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_FIS_std_class, grade_QUI_std_class,
                    grade_BIO_std_class, grade_FIL_std_class, grade_SOC_std_class, grade_EDFIS_std_class))

data_grades_EF_with_edfis <- data %>% 
  subset(high_school == 0 & is.na(id_student) == F,
         select = c(grade_LP_std_class,  grade_MAT_std_class, grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_CIEN_std_class, grade_EDFIS_std_class))  

fa_ef <- fa(data_grades_EF[,2:ncol(data_grades_EF)], nfactor = 1)
fa_em <- fa(data_grades_EM[,2:ncol(data_grades_EM)], nfactor = 1)

data$grades_score <- NA
data$grades_score[which(data$id_student %in% data_grades_EM$id_student)] <- fa_em$scores
data$grades_score[which(data$id_student %in% data_grades_EF$id_student)] <- fa_ef$scores


## Supply of friends:
data$sd_grades_score_fr_2 <- NA
data$supply_1_sd_fr_2 <- NA
data$supply_2_sd_fr_2 <- NA
data$supply_3_sd_fr_2 <- NA
data$friends_1_sd_fr_2 <- NA
data$friends_2_sd_fr_2 <- NA
data$friends_3_sd_fr_2 <- NA
data$supply_1_sd_same_race_fr_2 <- NA
data$supply_2_sd_same_race_fr_2 <- NA
data$supply_3_sd_same_race_fr_2 <- NA
data$friends_1_sd_same_race_fr_2 <- NA
data$friends_2_sd_same_race_fr_2 <- NA
data$friends_3_sd_same_race_fr_2 <- NA

for (j in 1:length(list_networks_strong)) {
  # j = 4
  my_class_id <- as.numeric(network.vertex.names(list_networks_strong[[j]]))
  class_size <- length(my_class_id)
  
  for (i in 1:class_size) {
    # i = 1
    myneigh <- get.neighborhood(list_networks_strong[[j]], v = i)
    myid    <- as.numeric(network::network.vertex.names(list_networks_strong[[j]])[i])
    myrace  <- network::get.vertex.attribute(list_networks_strong[[j]], attrname = "race_simple")[i]
    myrace_nw  <- network::get.vertex.attribute(list_networks_strong[[j]], attrname = "negro_na")[i]
    my_friends_race <- network::get.vertex.attribute(list_networks_strong[[j]], attrname = "race_simple")[myneigh]
    my_friends <- length(myneigh)
    my_friends_id <- as.numeric(network::network.vertex.names(list_networks_strong[[j]])[myneigh])
    my_score <- data$grades_score[which(data$id_student == myid)]
    
    # Input SD of friends grades
    data$sd_grades_score_fr_2[which(data$id_student == myid)] <- sd(data$grades_score[which(data$id_student %in% my_friends_id)], na.rm = T)
    
    # Number of friends within 1 sd from my score:
    data$friends_1_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                               data$grades_score >= my_score - .5 &
                                                                                               data$grades_score <= my_score + .5 )])
    
    # Number of friends within 2 sd from my score:
    data$friends_2_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                               data$grades_score >= my_score - 1 &
                                                                                               data$grades_score <= my_score + 1 )])
    # Number of friends within 3 sd from my score:
    data$friends_3_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                               data$grades_score >= my_score - 1.5 &
                                                                                               data$grades_score <= my_score + 1.5 )])
    # Number of classmates within 1 sd from my score:
    data$supply_1_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                              data$grades_score >= my_score - .5 &
                                                                                              data$grades_score <= my_score + .5 )])
    
    # Number of classmates within 2 sd from my score:
    data$supply_2_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                              data$grades_score >= my_score - 1 &
                                                                                              data$grades_score <= my_score + 1 )])
    # Number of classmates within 3 sd from my score:
    data$supply_3_sd_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                              data$grades_score >= my_score - 1.5 &
                                                                                              data$grades_score <= my_score + 1.5 )])
    # Number of classmates within 1 sd from my score of same race:
    data$supply_1_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                                        data$grades_score >= my_score - .5 &
                                                                                                        data$grades_score <= my_score + .5 &
                                                                                                        data$negro_na == myrace_nw )])
    
    # Number of classmates within 2 sd from my score of same race:
    data$supply_2_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                                        data$grades_score >= my_score - 1 &
                                                                                                        data$grades_score <= my_score + 1 &
                                                                                                        data$negro_na == myrace_nw)])
    # Number of classmates within 3 sd from my score of same race:
    data$supply_3_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_class_id &
                                                                                                        data$grades_score >= my_score - 1.5 &
                                                                                                        data$grades_score <= my_score + 1.5 &
                                                                                                        data$negro_na == myrace_nw)])
    # Number of friends within 1 sd from my score of same race:
    data$friends_1_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                                         data$grades_score >= my_score - .5 &
                                                                                                         data$grades_score <= my_score + .5 &
                                                                                                         data$negro_na == myrace_nw )])
    
    # Number of friends within 2 sd from my score of same race:
    data$friends_2_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                                         data$grades_score >= my_score - 1 &
                                                                                                         data$grades_score <= my_score + 1 &
                                                                                                         data$negro_na == myrace_nw)])
    # Number of friends within 3 sd from my score of same race:
    data$friends_3_sd_same_race_fr_2[which(data$id_student == myid)] <- length(data$grades_score[which(data$id_student %in% my_friends_id &
                                                                                                         data$grades_score >= my_score - 1.5 &
                                                                                                         data$grades_score <= my_score + 1.5 &
                                                                                                         data$negro_na == myrace_nw)])
    
  }
}


# Share of friends within 1 sd from own score:
data$sh_friends_1_sd_grade <- data$friends_1_sd_fr_2 / data$friends_2
data$sh_friends_1_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_1_sd_grade[which(data$friends_2 == 0)] <- NA

# Share of friends within 2 sd from own score:
data$sh_friends_2_sd_grade <- data$friends_2_sd_fr_2 / data$friends_2
data$sh_friends_2_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_2_sd_grade[which(data$friends_2 == 0)] <- NA

# Share of friends within 2 sd from own score:
data$sh_friends_3_sd_grade <- data$friends_3_sd_fr_2 / data$friends_2
data$sh_friends_3_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_3_sd_grade[which(data$friends_2 == 0)] <- NA

data$sh_nw_class_std <- (data$sh_nw_class - mean(data$sh_nw_class, na.rm = T))/sd(data$sh_nw_class, na.rm = T)

data <- data %>% group_by(class_id) %>%
  mutate(sh_nw_class_special = (sum(negro_na, na.rm = T) - negro_na)/(sum(is.na(negro_na) == F)-1))

data$sh_nw_class_special_std <- (data$sh_nw_class_special - mean(data$sh_nw_class_special, na.rm = T)/
                                   sd(data$sh_nw_class_special, na.rm = T))


#### Save data that I will use later: #####################
fwrite(data, file = "intermediary_outputs/robustness/robust_base_with_connection_data.csv")
